package data_deepprocessing.algorithm.bootstrapping.evaluate;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.zookeeper.server.quorum.Election;

import data_deepprocessing.algorithm.bootstrapping.db.BootSeedDB;
import data_deepprocessing.algorithm.crfs.CreateCRFsDataSet;
import data_deepprocessing.prepareData.beans.XianBingShi_new_zhangBean;
import data_deepprocessing.prepareData.db.InitSeedDB;
import data_deepprocessing.prepareData.db.XianBingShi_New_zhangDB;

/** 
* @author  作者 : YUHU YUAN
* @date 创建时间：2017年4月15日 上午8:40:04 
* @version 1.0  
*/

public class GenerateEvaluateDataSetService {
	//它和原始的initseed 比 它 是 一半的initseed+模式生成的种子
	private  BootSeedDB bootSeedDB;
	
	private XianBingShi_New_zhangDB xianBingShi_New_zhangDB;
	//这里是全部的初始种子，是当时规范化后自己整理出来的那批数据
	private InitSeedDB initSeedDB;
	
	private CreateCRFsDataSet createCRFsDataSet;
	
	private List<String> doGetBootSeeds(){
		return bootSeedDB.doSelectBootstrappingSeedContent();
	}
	
	private List<XianBingShi_new_zhangBean> doGetXianBingShis(){
		return xianBingShi_New_zhangDB.selectALlXianBingShi();
	} 
	
	private List<String> doGetInitSeeds(){
		return initSeedDB.selectAllSeedContent();
	}
	
	
	/** 
	* @author  作者 : YUHU YUAN
	* @date 创建时间：2017年4月15日 上午8:52:01 
	* @parameter 
	* @return
	* @throws
	* 经查看，生成的原始种子词大多含有S这个对标注和评价是极为不利的
	* 现在用S把他们切分，然后得到单独的种子词
	*/
	private List<String> handleBootSeeds(){
		Set<String> seedSet = new HashSet<>();
		List<String> bootSeeds = doGetBootSeeds();
		for(String seed : bootSeeds){
			String[] tempStrings = seed.split("S");
			for(String word : tempStrings){
				if(word.length()<2){
					continue;
				}
				if(word.contains("S")){
					continue;
				}
				seedSet.add(word.trim());
			}
		}
		bootSeeds.clear();
		bootSeeds.addAll(seedSet);
		return bootSeeds;
	}
	
	//注释的代码也很有用，只不过现在暂时不用了
//	private static final String standardDataSetPath = "D:\\yyh_yuanyuhu_graduation_experimental\\Bootstrapping\\dataset1\\StandardDataSet";
	private static final String resultDataSetPath = "D:\\yyh_yuanyuhu_graduation_experimental\\Bootstrapping\\dataset1\\result";
	
	
	public void generateStandardDataSet(){
//		List<String> initSeeds = doGetInitSeeds();
		List<XianBingShi_new_zhangBean> xianbingshiInfos = doGetXianBingShis();
		List<String> bootSeeds = handleBootSeeds();
		try {
//			createCRFsDataSet.doCreateCrfDataUpGradeThree2CRFNew(standardDataSetPath, initSeeds, xianbingshiInfos, "S");
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		try {
			createCRFsDataSet.doCreateCrfDataUpGradeThree2CRFNew(resultDataSetPath, bootSeeds, xianbingshiInfos, "S");
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
	}

	public BootSeedDB getBootSeedDB() {
		return bootSeedDB;
	}

	public void setBootSeedDB(BootSeedDB bootSeedDB) {
		this.bootSeedDB = bootSeedDB;
	}

	public XianBingShi_New_zhangDB getXianBingShi_New_zhangDB() {
		return xianBingShi_New_zhangDB;
	}

	public void setXianBingShi_New_zhangDB(XianBingShi_New_zhangDB xianBingShi_New_zhangDB) {
		this.xianBingShi_New_zhangDB = xianBingShi_New_zhangDB;
	}

	public InitSeedDB getInitSeedDB() {
		return initSeedDB;
	}

	public void setInitSeedDB(InitSeedDB initSeedDB) {
		this.initSeedDB = initSeedDB;
	}

	public CreateCRFsDataSet getCreateCRFsDataSet() {
		return createCRFsDataSet;
	}

	public void setCreateCRFsDataSet(CreateCRFsDataSet createCRFsDataSet) {
		this.createCRFsDataSet = createCRFsDataSet;
	}
	

}




















